ERDDAP

# libraries
library(here)
library(glue)
library(stringr)
library(readr)
library(dplyr)
library(purrr)
library(DT)
library(rerddap)
library(sf)
library(mapview)
library(leafsync)
library(leaflet)
library(skimr)
library(lubridate)
library(plotly)
here <- here::here

# paths & variables
erddap_url <- "https://oceanview.pfeg.noaa.gov/erddap/"
d_id       <- "EB_MM_BC"
dir_data   <- here("data")
d_csv      <- glue("{dir_data}/{d_id}.csv")
segs_geo   <- glue("{dir_data}/{d_id}_segments.geojson")
d_spp_csv  <- glue("{dir_data}/{d_id}_species.csv")

Sys.setenv(RERDDAP_DEFAULT_URL = erddap_url)

if (!file.exists(d_csv)){
  d_info <- info(d_id)
  d      <- tabledap(d_info)
  write_csv(d, d_csv)
}
d <- read_csv(d_csv, guess_max = 10000)

Of the 65,531 rows in this EB_MM_BC dataset, let’s look at the first 1,000 records:

d %>% 
  slice(1:100) %>% 
  datatable()
skimr::skim(d)
Data summary
Name d
Number of rows 65531
Number of columns 31
_______________________
Column type frequency:
character 11
logical 4
numeric 15
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
beach_segment_code 0 1.00 2 4 0 43 0
unique_survey_identifier 0 1.00 13 15 0 3276 0
species_sci_name 9560 0.85 6 28 0 97 0
species_common_name 72 1.00 8 30 0 116 0
species_code 0 1.00 4 4 0 118 0
age_class 48827 0.25 1 2 0 6 0
sex 197 1.00 1 1 0 3 0
bird_or_mammal 600 0.99 4 6 0 2 0
photos 1 1.00 1 1 0 3 0
beach_segment_name 0 1.00 5 21 0 41 0
notes 24008 0.63 34 151 0 32 0

Variable type: logical

skim_variable n_missing complete_rate mean count
external_findings 65531 0 NaN :
photo_code 65531 0 NaN :
species_genus 65531 0 NaN :
datum 65531 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
latitude 1 1.00 36.71 0.35 34.05 36.68 36.79 36.83 37.09 ▁▁▁▁▇
longitude 1 1.00 -121.78 0.26 -122.28 -121.84 -121.81 -121.80 -118.92 ▇▁▁▁▁
percent_of_beach_surveyed 65531 0.00 NaN NaN NA NA NA NA NA
carcass_present 0 1.00 0.99 0.08 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
unique_carcass_identifier 0 1.00 37457.74 20443.85 2.00 20953.50 37569.00 54332.50 113257.00 ▆▇▇▁▁
species_itis 1793 0.97 207619.87 123054.01 17417.00 174536.00 175170.00 176974.00 824077.00 ▇▁▁▁▁
standard_length 65531 0.00 NaN NaN NA NA NA NA NA
condition 2 1.00 3.36 0.80 0.00 3.00 4.00 4.00 9.00 ▁▇▇▁▁
cause_of_death 2 1.00 3.94 0.47 0.00 4.00 4.00 4.00 7.00 ▁▁▇▁▁
alive 0 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
beach_length 109 1.00 4.60 0.86 0.50 4.30 4.55 5.15 5.40 ▁▁▁▃▇
north_latitude 108 1.00 36.73 0.34 34.04 36.70 36.81 36.84 37.10 ▁▁▁▁▇
north_longitude 108 1.00 -121.79 0.25 -122.28 -121.84 -121.81 -121.81 -118.93 ▇▁▁▁▁
south_latitude 108 1.00 36.70 0.33 34.04 36.66 36.76 36.81 37.08 ▁▁▁▁▇
south_longitude 108 1.00 -121.78 0.25 -122.27 -121.84 -121.81 -121.79 -118.92 ▇▁▁▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
time 0 1 1997-05-02 2016-12-05 2007-06-03 986
  • percent_of_beach_surveyed: all NaN
  • cause_of_death: > 0 1 2 3 4 5 6 7 > 818 6 302 21 64182 189 3 8

TODO: skim()

spatial

intersect sanctuary with segments

xy2ln <- function(x1, y1, x2, y2){
  if (any(is.na(c(x1, y1, x2, y2)))) return(NA)
  
  st_linestring(c(
    st_point(c(x1, y1)), 
    st_point(c(x2, y2))))
}

if (!file.exists(segs_geo)){
  
  segs <- d %>% 
    group_by(
      beach_segment_code, 
      longitude, latitude, 
      north_longitude, north_latitude,
      south_longitude, south_latitude) %>% 
    summarize(
      n_rows = n()) %>% 
    ungroup() %>% 
    mutate(
      geom = pmap(list(
        north_longitude, north_latitude,
        south_longitude, south_latitude), xy2ln) %>% 
        st_sfc(crs = 4326)) %>% 
    st_set_geometry(.$geom)
  
  write_sf(segs, segs_geo, delete_dsn = T)
}
segs <- read_sf(segs_geo)

# TODO: move used fxns into library and utility.R
source("~/github/cinms/scripts/rocky.R")

nms     <- "mbnms"
nms_ply <- get_nms_ply(nms)

nms_segs <- segs %>% 
  filter(
    st_intersects(
      segs, nms_ply, sparse = F))

m1 <- mapview(nms_ply) + 
  mapview(segs, color="red") + 
  mapview(nms_segs, color="green")
m1

NOTE the red segments near San Jose that did not get intersected by the polygon because of spatial coarseness and mismatch.

intersect buffered sanctuary with segments

Buffer sanctuary by 0.005 decimal degress (~ 0.555 km at equator).

nms_ply_buf <- st_buffer(nms_ply, dist = 0.005)

nms_buf_segs <- segs %>% 
  filter(
    st_intersects(
      segs, nms_ply_buf, sparse = F))

d_nms_buf <- d %>% 
  semi_join(
    nms_buf_segs, by = "beach_segment_code")

m2 <- mapview(nms_ply_buf) +
  mapview(nms_buf_segs, color="green")

leafsync::sync
## function (..., ncol = 2, sync = "all", sync.cursor = TRUE, no.initial.sync = TRUE) 
## {
##     latticeView(..., ncol = ncol, sync = sync, sync.cursor = sync.cursor, 
##         no.initial.sync = no.initial.sync)
## }
## <bytecode: 0x7ff0138cff50>
## <environment: namespace:leafsync>
sync(m1, m2)

Filter by beach_segment_code in MBNMS: 63,600 of 65,531 rows (97.1%).

taxa

if (!file.exists(d_spp_csv)){
  
  flds_spp <- d_info$variables$variable_name %>% 
    str_subset("species") %>% 
    c("bird_or_mammal")
  
  d_spp <- tabledap(d_info, fields = flds_spp, distinct = T) %>% 
    arrange(bird_or_mammal, species_code)
  
  write_csv(d_spp, d_spp_csv)
}
d_spp <- read_csv(d_spp_csv)

table(d_spp$bird_or_mammal)
## 
##   bird mammal 
##     88     25

birds

d_spp %>% 
  filter(bird_or_mammal == "bird") %>% 
  datatable()

mammals

d_spp %>% 
  filter(bird_or_mammal == "mammal") %>% 
  datatable()

plots

bird carcasses

static

g_birds <- d_nms_buf %>% 
  filter(
    bird_or_mammal == "bird",
    carcass_present == T) %>% 
  mutate(
    year = year(time)) %>% 
  group_by(year) %>% 
  summarize(
    n = n()) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n)) + 
  geom_bar(stat = "identity") #+ 
  #title("Bird carcasses over time")
print(g_birds)

interactive, color by species on hover

#plot.new()
g_bird_spp <- d_nms_buf %>% 
  filter(
    bird_or_mammal == "bird",
    carcass_present == T) %>% 
  mutate(
    year = year(time)) %>% 
  group_by(year, species_code) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n, fill=species_code)) + 
  geom_bar(stat = "identity") + 
  #title("Bird carcasses over time, by species") + 
  theme(legend.position = "none")

ggplotly(g_bird_spp)

mammal carcasses

static

g_mammal <- d_nms_buf %>% 
  filter(
    bird_or_mammal == "mammal",
    carcass_present == T) %>% 
  mutate(
    year = year(time)) %>% 
  group_by(year) %>% 
  summarize(
    n = n()) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n)) + 
  geom_bar(stat = "identity") #+ 
  #title("Bird carcasses over time")
print(g_mammal)

interactive, color by species on hover

#plot.new()
g_mammal_spp <- d_nms_buf %>% 
  filter(
    bird_or_mammal == "mammal",
    carcass_present == T) %>% 
  mutate(
    year = year(time)) %>% 
  group_by(year, species_code) %>% 
  summarize(n = n()) %>% 
  ungroup() %>% 
  ggplot(aes(x=year, y=n, fill=species_code)) + 
  geom_bar(stat = "identity") + 
  #title("Bird carcasses over time, by species") + 
  theme(legend.position = "none")

ggplotly(g_mammal_spp)